1 package org.apache.lucene.analysis.core;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.io.Reader;
23 import java.io.StringReader;
24 import java.lang.reflect.Constructor;
25 import java.lang.reflect.InvocationTargetException;
26 import java.lang.reflect.Modifier;
27 import java.net.URI;
28 import java.net.URL;
29 import java.nio.CharBuffer;
30 import java.nio.file.DirectoryStream;
31 import java.nio.file.Files;
32 import java.nio.file.Path;
33 import java.nio.file.Paths;
34 import java.util.ArrayList;
35 import java.util.Arrays;
36 import java.util.Collection;
37 import java.util.Collections;
38 import java.util.Comparator;
39 import java.util.Enumeration;
40 import java.util.HashMap;
41 import java.util.HashSet;
42 import java.util.IdentityHashMap;
43 import java.util.List;
44 import java.util.Map;
45 import java.util.Random;
46 import java.util.Set;
47 import java.util.regex.Pattern;
48
49 import org.apache.lucene.analysis.Analyzer;
50 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
51 import org.apache.lucene.analysis.CachingTokenFilter;
52 import org.apache.lucene.analysis.CharFilter;
53 import org.apache.lucene.analysis.CrankyTokenFilter;
54 import org.apache.lucene.analysis.MockGraphTokenFilter;
55 import org.apache.lucene.analysis.MockRandomLookaheadTokenFilter;
56 import org.apache.lucene.analysis.MockTokenFilter;
57 import org.apache.lucene.analysis.MockTokenizer;
58 import org.apache.lucene.analysis.TokenFilter;
59 import org.apache.lucene.analysis.TokenStream;
60 import org.apache.lucene.analysis.Tokenizer;
61 import org.apache.lucene.analysis.ValidatingTokenFilter;
62 import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
63 import org.apache.lucene.analysis.cjk.CJKBigramFilter;
64 import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
65 import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
66 import org.apache.lucene.analysis.compound.Lucene43HyphenationCompoundWordTokenFilter;
67 import org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter;
68 import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
69 import org.apache.lucene.analysis.hunspell.Dictionary;
70 import org.apache.lucene.analysis.hunspell.TestHunspellStemFilter;
71 import org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
72 import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
73 import org.apache.lucene.analysis.miscellaneous.LimitTokenOffsetFilter;
74 import org.apache.lucene.analysis.miscellaneous.LimitTokenPositionFilter;
75 import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
76 import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
77 import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
78 import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
79 import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
80 import org.apache.lucene.analysis.payloads.IdentityEncoder;
81 import org.apache.lucene.analysis.payloads.PayloadEncoder;
82 import org.apache.lucene.analysis.snowball.TestSnowball;
83 import org.apache.lucene.analysis.standard.StandardTokenizer;
84 import org.apache.lucene.analysis.synonym.SynonymMap;
85 import org.apache.lucene.analysis.util.CharArrayMap;
86 import org.apache.lucene.analysis.util.CharArraySet;
87 import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
88 import org.apache.lucene.util.AttributeFactory;
89 import org.apache.lucene.util.AttributeSource;
90 import org.apache.lucene.util.CharsRef;
91 import org.apache.lucene.util.Rethrow;
92 import org.apache.lucene.util.TestUtil;
93 import org.apache.lucene.util.Version;
94 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
95 import org.junit.AfterClass;
96 import org.junit.BeforeClass;
97 import org.tartarus.snowball.SnowballProgram;
98 import org.xml.sax.InputSource;
99
100
101 public class TestRandomChains extends BaseTokenStreamTestCase {
102
103 static List<Constructor<? extends Tokenizer>> tokenizers;
104 static List<Constructor<? extends TokenFilter>> tokenfilters;
105 static List<Constructor<? extends CharFilter>> charfilters;
106
107 private static interface Predicate<T> {
108 boolean apply(T o);
109 }
110
111 private static final Predicate<Object[]> ALWAYS = new Predicate<Object[]>() {
112 public boolean apply(Object[] args) {
113 return true;
114 };
115 };
116
117 private static final Map<Constructor<?>,Predicate<Object[]>> brokenConstructors = new HashMap<>();
118 static {
119 try {
120 brokenConstructors.put(
121 LimitTokenCountFilter.class.getConstructor(TokenStream.class, int.class),
122 ALWAYS);
123 brokenConstructors.put(
124 LimitTokenCountFilter.class.getConstructor(TokenStream.class, int.class, boolean.class),
125 new Predicate<Object[]>() {
126 @Override
127 public boolean apply(Object[] args) {
128 assert args.length == 3;
129 return !((Boolean) args[2]);
130 }
131 });
132 brokenConstructors.put(
133 LimitTokenOffsetFilter.class.getConstructor(TokenStream.class, int.class),
134 ALWAYS);
135 brokenConstructors.put(
136 LimitTokenOffsetFilter.class.getConstructor(TokenStream.class, int.class, boolean.class),
137 new Predicate<Object[]>() {
138 @Override
139 public boolean apply(Object[] args) {
140 assert args.length == 3;
141 return !((Boolean) args[2]);
142 }
143 });
144 brokenConstructors.put(
145 LimitTokenPositionFilter.class.getConstructor(TokenStream.class, int.class),
146 ALWAYS);
147 brokenConstructors.put(
148 LimitTokenPositionFilter.class.getConstructor(TokenStream.class, int.class, boolean.class),
149 new Predicate<Object[]>() {
150 @Override
151 public boolean apply(Object[] args) {
152 assert args.length == 3;
153 return !((Boolean) args[2]);
154 }
155 });
156 for (Class<?> c : Arrays.<Class<?>>asList(
157
158
159
160 CachingTokenFilter.class,
161
162 CrankyTokenFilter.class,
163
164
165 ValidatingTokenFilter.class,
166
167 WordDelimiterFilter.class)) {
168 for (Constructor<?> ctor : c.getConstructors()) {
169 brokenConstructors.put(ctor, ALWAYS);
170 }
171 }
172 } catch (Exception e) {
173 throw new Error(e);
174 }
175 }
176
177
178
179 private static final Map<Constructor<?>,Predicate<Object[]>> brokenOffsetsConstructors = new HashMap<>();
180 static {
181 try {
182 for (Class<?> c : Arrays.<Class<?>>asList(
183 ReversePathHierarchyTokenizer.class,
184 PathHierarchyTokenizer.class,
185
186 WikipediaTokenizer.class,
187
188 CJKBigramFilter.class,
189
190 HyphenatedWordsFilter.class,
191
192 CommonGramsFilter.class,
193
194 CommonGramsQueryFilter.class)) {
195 for (Constructor<?> ctor : c.getConstructors()) {
196 brokenOffsetsConstructors.put(ctor, ALWAYS);
197 }
198 }
199 } catch (Exception e) {
200 throw new Error(e);
201 }
202 }
203
204 @BeforeClass
205 public static void beforeClass() throws Exception {
206 List<Class<?>> analysisClasses = getClassesForPackage("org.apache.lucene.analysis");
207 tokenizers = new ArrayList<>();
208 tokenfilters = new ArrayList<>();
209 charfilters = new ArrayList<>();
210 for (final Class<?> c : analysisClasses) {
211 final int modifiers = c.getModifiers();
212 if (
213
214 Modifier.isAbstract(modifiers) || !Modifier.isPublic(modifiers)
215 || c.isSynthetic() || c.isAnonymousClass() || c.isMemberClass() || c.isInterface()
216 || c.isAnnotationPresent(Deprecated.class)
217 || !(Tokenizer.class.isAssignableFrom(c) || TokenFilter.class.isAssignableFrom(c) || CharFilter.class.isAssignableFrom(c))
218 ) {
219 continue;
220 }
221
222 for (final Constructor<?> ctor : c.getConstructors()) {
223
224 if (ctor.isSynthetic() || ctor.isAnnotationPresent(Deprecated.class) || brokenConstructors.get(ctor) == ALWAYS) {
225 continue;
226 }
227 if (Tokenizer.class.isAssignableFrom(c)) {
228 assertTrue(ctor.toGenericString() + " has unsupported parameter types",
229 allowedTokenizerArgs.containsAll(Arrays.asList(ctor.getParameterTypes())));
230 tokenizers.add(castConstructor(Tokenizer.class, ctor));
231 } else if (TokenFilter.class.isAssignableFrom(c)) {
232 assertTrue(ctor.toGenericString() + " has unsupported parameter types",
233 allowedTokenFilterArgs.containsAll(Arrays.asList(ctor.getParameterTypes())));
234 tokenfilters.add(castConstructor(TokenFilter.class, ctor));
235 } else if (CharFilter.class.isAssignableFrom(c)) {
236 assertTrue(ctor.toGenericString() + " has unsupported parameter types",
237 allowedCharFilterArgs.containsAll(Arrays.asList(ctor.getParameterTypes())));
238 charfilters.add(castConstructor(CharFilter.class, ctor));
239 } else {
240 fail("Cannot get here");
241 }
242 }
243 }
244
245 final Comparator<Constructor<?>> ctorComp = new Comparator<Constructor<?>>() {
246 @Override
247 public int compare(Constructor<?> arg0, Constructor<?> arg1) {
248 return arg0.toGenericString().compareTo(arg1.toGenericString());
249 }
250 };
251 Collections.sort(tokenizers, ctorComp);
252 Collections.sort(tokenfilters, ctorComp);
253 Collections.sort(charfilters, ctorComp);
254 if (VERBOSE) {
255 System.out.println("tokenizers = " + tokenizers);
256 System.out.println("tokenfilters = " + tokenfilters);
257 System.out.println("charfilters = " + charfilters);
258 }
259 }
260
261 @AfterClass
262 public static void afterClass() {
263 tokenizers = null;
264 tokenfilters = null;
265 charfilters = null;
266 }
267
268
269
270 @SuppressWarnings("unchecked")
271 private static <T> Constructor<T> castConstructor(Class<T> instanceClazz, Constructor<?> ctor) {
272 return (Constructor<T>) ctor;
273 }
274
275 public static List<Class<?>> getClassesForPackage(String pckgname) throws Exception {
276 final List<Class<?>> classes = new ArrayList<>();
277 collectClassesForPackage(pckgname, classes);
278 assertFalse("No classes found in package '"+pckgname+"'; maybe your test classes are packaged as JAR file?", classes.isEmpty());
279 return classes;
280 }
281
282 private static void collectClassesForPackage(String pckgname, List<Class<?>> classes) throws Exception {
283 final ClassLoader cld = TestRandomChains.class.getClassLoader();
284 final String path = pckgname.replace('.', '/');
285 final Enumeration<URL> resources = cld.getResources(path);
286 while (resources.hasMoreElements()) {
287 final URI uri = resources.nextElement().toURI();
288 if (!"file".equalsIgnoreCase(uri.getScheme()))
289 continue;
290 final Path directory = Paths.get(uri);
291 if (Files.exists(directory)) {
292 try (DirectoryStream<Path> stream = Files.newDirectoryStream(directory)) {
293 for (Path file : stream) {
294 if (Files.isDirectory(file)) {
295
296 String subPackage = pckgname + "." + file.getFileName().toString();
297 collectClassesForPackage(subPackage, classes);
298 }
299 String fname = file.getFileName().toString();
300 if (fname.endsWith(".class")) {
301 String clazzName = fname.substring(0, fname.length() - 6);
302
303
304 if (!clazzName.endsWith("Test") && !clazzName.startsWith("Test")) {
305
306
307 classes.add(Class.forName(pckgname + '.' + clazzName, false, cld));
308 }
309 }
310 }
311 }
312 }
313 }
314 }
315
316 private static interface ArgProducer {
317 Object create(Random random);
318 }
319
320 private static final Map<Class<?>,ArgProducer> argProducers = new IdentityHashMap<Class<?>,ArgProducer>() {{
321 put(int.class, new ArgProducer() {
322 @Override public Object create(Random random) {
323
324
325
326 return Integer.valueOf(TestUtil.nextInt(random, -50, 50));
327 }
328 });
329 put(char.class, new ArgProducer() {
330 @Override public Object create(Random random) {
331
332
333
334 while(true) {
335 char c = (char)random.nextInt(65536);
336 if (c < '\uD800' || c > '\uDFFF') {
337 return Character.valueOf(c);
338 }
339 }
340 }
341 });
342 put(float.class, new ArgProducer() {
343 @Override public Object create(Random random) {
344 return Float.valueOf(random.nextFloat());
345 }
346 });
347 put(boolean.class, new ArgProducer() {
348 @Override public Object create(Random random) {
349 return Boolean.valueOf(random.nextBoolean());
350 }
351 });
352 put(byte.class, new ArgProducer() {
353 @Override public Object create(Random random) {
354
355 return Byte.valueOf((byte) random.nextInt(256));
356 }
357 });
358 put(byte[].class, new ArgProducer() {
359 @Override public Object create(Random random) {
360 byte bytes[] = new byte[random.nextInt(256)];
361 random.nextBytes(bytes);
362 return bytes;
363 }
364 });
365 put(Random.class, new ArgProducer() {
366 @Override public Object create(Random random) {
367 return new Random(random.nextLong());
368 }
369 });
370 put(Version.class, new ArgProducer() {
371 @Override public Object create(Random random) {
372
373 return Version.LATEST;
374 }
375 });
376 put(AttributeFactory.class, new ArgProducer() {
377 @Override public Object create(Random random) {
378 return newAttributeFactory(random);
379 }
380 });
381 put(Set.class, new ArgProducer() {
382 @Override public Object create(Random random) {
383
384 Set<String> set = new HashSet<>();
385 int num = random.nextInt(5);
386 for (int i = 0; i < num; i++) {
387 set.add(StandardTokenizer.TOKEN_TYPES[random.nextInt(StandardTokenizer.TOKEN_TYPES.length)]);
388 }
389 return set;
390 }
391 });
392 put(Collection.class, new ArgProducer() {
393 @Override public Object create(Random random) {
394
395 Collection<char[]> col = new ArrayList<>();
396 int num = random.nextInt(5);
397 for (int i = 0; i < num; i++) {
398 col.add(TestUtil.randomSimpleString(random).toCharArray());
399 }
400 return col;
401 }
402 });
403 put(CharArraySet.class, new ArgProducer() {
404 @Override public Object create(Random random) {
405 int num = random.nextInt(10);
406 CharArraySet set = new CharArraySet(num, random.nextBoolean());
407 for (int i = 0; i < num; i++) {
408
409 set.add(TestUtil.randomSimpleString(random));
410 }
411 return set;
412 }
413 });
414 put(Pattern.class, new ArgProducer() {
415 @Override public Object create(Random random) {
416
417
418 return Pattern.compile("a");
419 }
420 });
421
422 put(Pattern[].class, new ArgProducer() {
423 @Override public Object create(Random random) {
424 return new Pattern[] {Pattern.compile("([a-z]+)"), Pattern.compile("([0-9]+)")};
425 }
426 });
427 put(PayloadEncoder.class, new ArgProducer() {
428 @Override public Object create(Random random) {
429 return new IdentityEncoder();
430 }
431 });
432 put(Dictionary.class, new ArgProducer() {
433 @Override public Object create(Random random) {
434
435 InputStream affixStream = TestHunspellStemFilter.class.getResourceAsStream("simple.aff");
436 InputStream dictStream = TestHunspellStemFilter.class.getResourceAsStream("simple.dic");
437 try {
438 return new Dictionary(affixStream, dictStream);
439 } catch (Exception ex) {
440 Rethrow.rethrow(ex);
441 return null;
442 }
443 }
444 });
445 put(HyphenationTree.class, new ArgProducer() {
446 @Override public Object create(Random random) {
447
448 try {
449 InputSource is = new InputSource(TestCompoundWordTokenFilter.class.getResource("da_UTF8.xml").toExternalForm());
450 HyphenationTree hyphenator = Lucene43HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
451 return hyphenator;
452 } catch (Exception ex) {
453 Rethrow.rethrow(ex);
454 return null;
455 }
456 }
457 });
458 put(SnowballProgram.class, new ArgProducer() {
459 @Override public Object create(Random random) {
460 try {
461 String lang = TestSnowball.SNOWBALL_LANGS[random.nextInt(TestSnowball.SNOWBALL_LANGS.length)];
462 Class<? extends SnowballProgram> clazz = Class.forName("org.tartarus.snowball.ext." + lang + "Stemmer").asSubclass(SnowballProgram.class);
463 return clazz.newInstance();
464 } catch (Exception ex) {
465 Rethrow.rethrow(ex);
466 return null;
467 }
468 }
469 });
470 put(String.class, new ArgProducer() {
471 @Override public Object create(Random random) {
472
473 if (random.nextBoolean()) {
474
475 return StandardTokenizer.TOKEN_TYPES[random.nextInt(StandardTokenizer.TOKEN_TYPES.length)];
476 } else {
477 return TestUtil.randomSimpleString(random);
478 }
479 }
480 });
481 put(NormalizeCharMap.class, new ArgProducer() {
482 @Override public Object create(Random random) {
483 NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
484
485 Set<String> keys = new HashSet<>();
486 int num = random.nextInt(5);
487
488 for (int i = 0; i < num; i++) {
489 String key = TestUtil.randomSimpleString(random);
490 if (!keys.contains(key) && key.length() > 0) {
491 String value = TestUtil.randomSimpleString(random);
492 builder.add(key, value);
493 keys.add(key);
494
495 }
496 }
497 return builder.build();
498 }
499 });
500 put(CharacterRunAutomaton.class, new ArgProducer() {
501 @Override public Object create(Random random) {
502
503 switch(random.nextInt(5)) {
504 case 0: return MockTokenizer.KEYWORD;
505 case 1: return MockTokenizer.SIMPLE;
506 case 2: return MockTokenizer.WHITESPACE;
507 case 3: return MockTokenFilter.EMPTY_STOPSET;
508 default: return MockTokenFilter.ENGLISH_STOPSET;
509 }
510 }
511 });
512 put(CharArrayMap.class, new ArgProducer() {
513 @Override public Object create(Random random) {
514 int num = random.nextInt(10);
515 CharArrayMap<String> map = new CharArrayMap<>(num, random.nextBoolean());
516 for (int i = 0; i < num; i++) {
517
518 map.put(TestUtil.randomSimpleString(random), TestUtil.randomSimpleString(random));
519 }
520 return map;
521 }
522 });
523 put(StemmerOverrideMap.class, new ArgProducer() {
524 @Override public Object create(Random random) {
525 int num = random.nextInt(10);
526 StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random.nextBoolean());
527 for (int i = 0; i < num; i++) {
528 String input = "";
529 do {
530 input = TestUtil.randomRealisticUnicodeString(random);
531 } while(input.isEmpty());
532 String out = ""; TestUtil.randomSimpleString(random);
533 do {
534 out = TestUtil.randomRealisticUnicodeString(random);
535 } while(out.isEmpty());
536 builder.add(input, out);
537 }
538 try {
539 return builder.build();
540 } catch (Exception ex) {
541 Rethrow.rethrow(ex);
542 return null;
543 }
544 }
545 });
546 put(SynonymMap.class, new ArgProducer() {
547 @Override public Object create(Random random) {
548 SynonymMap.Builder b = new SynonymMap.Builder(random.nextBoolean());
549 final int numEntries = atLeast(10);
550 for (int j = 0; j < numEntries; j++) {
551 addSyn(b, randomNonEmptyString(random), randomNonEmptyString(random), random.nextBoolean());
552 }
553 try {
554 return b.build();
555 } catch (Exception ex) {
556 Rethrow.rethrow(ex);
557 return null;
558 }
559 }
560
561 private void addSyn(SynonymMap.Builder b, String input, String output, boolean keepOrig) {
562 b.add(new CharsRef(input.replaceAll(" +", "\u0000")),
563 new CharsRef(output.replaceAll(" +", "\u0000")),
564 keepOrig);
565 }
566
567 private String randomNonEmptyString(Random random) {
568 while(true) {
569 final String s = TestUtil.randomUnicodeString(random).trim();
570 if (s.length() != 0 && s.indexOf('\u0000') == -1) {
571 return s;
572 }
573 }
574 }
575 });
576 }};
577
578 static final Set<Class<?>> allowedTokenizerArgs, allowedTokenFilterArgs, allowedCharFilterArgs;
579 static {
580 allowedTokenizerArgs = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
581 allowedTokenizerArgs.addAll(argProducers.keySet());
582 allowedTokenizerArgs.add(Reader.class);
583 allowedTokenizerArgs.add(AttributeFactory.class);
584 allowedTokenizerArgs.add(AttributeSource.class);
585
586 allowedTokenFilterArgs = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
587 allowedTokenFilterArgs.addAll(argProducers.keySet());
588 allowedTokenFilterArgs.add(TokenStream.class);
589
590 allowedTokenFilterArgs.add(CommonGramsFilter.class);
591
592 allowedCharFilterArgs = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
593 allowedCharFilterArgs.addAll(argProducers.keySet());
594 allowedCharFilterArgs.add(Reader.class);
595 }
596
597 @SuppressWarnings("unchecked")
598 static <T> T newRandomArg(Random random, Class<T> paramType) {
599 final ArgProducer producer = argProducers.get(paramType);
600 assertNotNull("No producer for arguments of type " + paramType.getName() + " found", producer);
601 return (T) producer.create(random);
602 }
603
604 static Object[] newTokenizerArgs(Random random, Class<?>[] paramTypes) {
605 Object[] args = new Object[paramTypes.length];
606 for (int i = 0; i < args.length; i++) {
607 Class<?> paramType = paramTypes[i];
608 if (paramType == AttributeSource.class) {
609
610
611 args[i] = null;
612 } else {
613 args[i] = newRandomArg(random, paramType);
614 }
615 }
616 return args;
617 }
618
619 static Object[] newCharFilterArgs(Random random, Reader reader, Class<?>[] paramTypes) {
620 Object[] args = new Object[paramTypes.length];
621 for (int i = 0; i < args.length; i++) {
622 Class<?> paramType = paramTypes[i];
623 if (paramType == Reader.class) {
624 args[i] = reader;
625 } else {
626 args[i] = newRandomArg(random, paramType);
627 }
628 }
629 return args;
630 }
631
632 static Object[] newFilterArgs(Random random, TokenStream stream, Class<?>[] paramTypes) {
633 Object[] args = new Object[paramTypes.length];
634 for (int i = 0; i < args.length; i++) {
635 Class<?> paramType = paramTypes[i];
636 if (paramType == TokenStream.class) {
637 args[i] = stream;
638 } else if (paramType == CommonGramsFilter.class) {
639
640 args[i] = new CommonGramsFilter(stream, newRandomArg(random, CharArraySet.class));
641 } else {
642 args[i] = newRandomArg(random, paramType);
643 }
644 }
645 return args;
646 }
647
648 static class MockRandomAnalyzer extends Analyzer {
649 final long seed;
650
651 MockRandomAnalyzer(long seed) {
652 this.seed = seed;
653 }
654
655 public boolean offsetsAreCorrect() {
656
657 Random random = new Random(seed);
658 TokenizerSpec tokenizerSpec = newTokenizer(random);
659 TokenFilterSpec filterSpec = newFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
660 return filterSpec.offsetsAreCorrect;
661 }
662
663 @Override
664 protected TokenStreamComponents createComponents(String fieldName) {
665 Random random = new Random(seed);
666 TokenizerSpec tokenizerSpec = newTokenizer(random);
667
668 TokenFilterSpec filterSpec = newFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
669
670 return new TokenStreamComponents(tokenizerSpec.tokenizer, filterSpec.stream);
671 }
672
673 @Override
674 protected Reader initReader(String fieldName, Reader reader) {
675 Random random = new Random(seed);
676 CharFilterSpec charfilterspec = newCharFilterChain(random, reader);
677 return charfilterspec.reader;
678 }
679
680 @Override
681 public String toString() {
682 Random random = new Random(seed);
683 StringBuilder sb = new StringBuilder();
684 CharFilterSpec charFilterSpec = newCharFilterChain(random, new StringReader(""));
685 sb.append("\ncharfilters=");
686 sb.append(charFilterSpec.toString);
687
688 random = new Random(seed);
689 TokenizerSpec tokenizerSpec = newTokenizer(random);
690 sb.append("\n");
691 sb.append("tokenizer=");
692 sb.append(tokenizerSpec.toString);
693 TokenFilterSpec tokenFilterSpec = newFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
694 sb.append("\n");
695 sb.append("filters=");
696 sb.append(tokenFilterSpec.toString);
697 sb.append("\n");
698 sb.append("offsetsAreCorrect=" + tokenFilterSpec.offsetsAreCorrect);
699 return sb.toString();
700 }
701
702 private <T> T createComponent(Constructor<T> ctor, Object[] args, StringBuilder descr) {
703 try {
704 final T instance = ctor.newInstance(args);
705
706
707
708
709
710 descr.append("\n ");
711 descr.append(ctor.getDeclaringClass().getName());
712 String params = Arrays.deepToString(args);
713 params = params.substring(1, params.length()-1);
714 descr.append("(").append(params).append(")");
715 return instance;
716 } catch (InvocationTargetException ite) {
717 final Throwable cause = ite.getCause();
718 if (cause instanceof IllegalArgumentException ||
719 cause instanceof UnsupportedOperationException) {
720
721 if (VERBOSE) {
722 System.err.println("Ignoring IAE/UOE from ctor:");
723 cause.printStackTrace(System.err);
724 }
725 } else {
726 Rethrow.rethrow(cause);
727 }
728 } catch (IllegalAccessException | InstantiationException iae) {
729 Rethrow.rethrow(iae);
730 }
731 return null;
732 }
733
734 private boolean broken(Constructor<?> ctor, Object[] args) {
735 final Predicate<Object[]> pred = brokenConstructors.get(ctor);
736 return pred != null && pred.apply(args);
737 }
738
739 private boolean brokenOffsets(Constructor<?> ctor, Object[] args) {
740 final Predicate<Object[]> pred = brokenOffsetsConstructors.get(ctor);
741 return pred != null && pred.apply(args);
742 }
743
744
745 private TokenizerSpec newTokenizer(Random random) {
746 TokenizerSpec spec = new TokenizerSpec();
747 while (spec.tokenizer == null) {
748 final Constructor<? extends Tokenizer> ctor = tokenizers.get(random.nextInt(tokenizers.size()));
749 final StringBuilder descr = new StringBuilder();
750 final Object args[] = newTokenizerArgs(random, ctor.getParameterTypes());
751 if (broken(ctor, args)) {
752 continue;
753 }
754 spec.tokenizer = createComponent(ctor, args, descr);
755 if (spec.tokenizer != null) {
756 spec.offsetsAreCorrect &= !brokenOffsets(ctor, args);
757 spec.toString = descr.toString();
758 }
759 }
760 return spec;
761 }
762
763 private CharFilterSpec newCharFilterChain(Random random, Reader reader) {
764 CharFilterSpec spec = new CharFilterSpec();
765 spec.reader = reader;
766 StringBuilder descr = new StringBuilder();
767 int numFilters = random.nextInt(3);
768 for (int i = 0; i < numFilters; i++) {
769 while (true) {
770 final Constructor<? extends CharFilter> ctor = charfilters.get(random.nextInt(charfilters.size()));
771 final Object args[] = newCharFilterArgs(random, spec.reader, ctor.getParameterTypes());
772 if (broken(ctor, args)) {
773 continue;
774 }
775 reader = createComponent(ctor, args, descr);
776 if (reader != null) {
777 spec.reader = reader;
778 break;
779 }
780 }
781 }
782 spec.toString = descr.toString();
783 return spec;
784 }
785
786 private TokenFilterSpec newFilterChain(Random random, Tokenizer tokenizer, boolean offsetsAreCorrect) {
787 TokenFilterSpec spec = new TokenFilterSpec();
788 spec.offsetsAreCorrect = offsetsAreCorrect;
789 spec.stream = tokenizer;
790 StringBuilder descr = new StringBuilder();
791 int numFilters = random.nextInt(5);
792 for (int i = 0; i < numFilters; i++) {
793
794
795
796
797 spec.stream = new ValidatingTokenFilter(spec.stream, "stage " + i, spec.offsetsAreCorrect);
798
799 while (true) {
800 final Constructor<? extends TokenFilter> ctor = tokenfilters.get(random.nextInt(tokenfilters.size()));
801
802
803
804 if (!spec.offsetsAreCorrect &&
805 (ctor.getDeclaringClass().equals(MockGraphTokenFilter.class)
806 || ctor.getDeclaringClass().equals(MockRandomLookaheadTokenFilter.class))) {
807 continue;
808 }
809
810 final Object args[] = newFilterArgs(random, spec.stream, ctor.getParameterTypes());
811 if (broken(ctor, args)) {
812 continue;
813 }
814 final TokenFilter flt = createComponent(ctor, args, descr);
815 if (flt != null) {
816 spec.offsetsAreCorrect &= !brokenOffsets(ctor, args);
817 spec.stream = flt;
818 break;
819 }
820 }
821 }
822
823
824
825
826 spec.stream = new ValidatingTokenFilter(spec.stream, "last stage", spec.offsetsAreCorrect);
827
828 spec.toString = descr.toString();
829 return spec;
830 }
831 }
832
833 static class CheckThatYouDidntReadAnythingReaderWrapper extends CharFilter {
834 boolean readSomething;
835
836 CheckThatYouDidntReadAnythingReaderWrapper(Reader in) {
837 super(in);
838 }
839
840 @Override
841 public int correct(int currentOff) {
842 return currentOff;
843 }
844
845 @Override
846 public int read(char[] cbuf, int off, int len) throws IOException {
847 readSomething = true;
848 return input.read(cbuf, off, len);
849 }
850
851 @Override
852 public int read() throws IOException {
853 readSomething = true;
854 return input.read();
855 }
856
857 @Override
858 public int read(CharBuffer target) throws IOException {
859 readSomething = true;
860 return input.read(target);
861 }
862
863 @Override
864 public int read(char[] cbuf) throws IOException {
865 readSomething = true;
866 return input.read(cbuf);
867 }
868
869 @Override
870 public long skip(long n) throws IOException {
871 readSomething = true;
872 return input.skip(n);
873 }
874
875 @Override
876 public void mark(int readAheadLimit) throws IOException {
877 input.mark(readAheadLimit);
878 }
879
880 @Override
881 public boolean markSupported() {
882 return input.markSupported();
883 }
884
885 @Override
886 public boolean ready() throws IOException {
887 return input.ready();
888 }
889
890 @Override
891 public void reset() throws IOException {
892 input.reset();
893 }
894 }
895
896 static class TokenizerSpec {
897 Tokenizer tokenizer;
898 String toString;
899 boolean offsetsAreCorrect = true;
900 }
901
902 static class TokenFilterSpec {
903 TokenStream stream;
904 String toString;
905 boolean offsetsAreCorrect = true;
906 }
907
908 static class CharFilterSpec {
909 Reader reader;
910 String toString;
911 }
912
913 public void testRandomChains() throws Throwable {
914 int numIterations = TEST_NIGHTLY ? atLeast(20) : 3;
915 Random random = random();
916 for (int i = 0; i < numIterations; i++) {
917 try (MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong())) {
918 if (VERBOSE) {
919 System.out.println("Creating random analyzer:" + a);
920 }
921 try {
922 checkRandomData(random, a, 500*RANDOM_MULTIPLIER, 20, false,
923 false );
924 } catch (Throwable e) {
925 System.err.println("Exception from random analyzer: " + a);
926 throw e;
927 }
928 }
929 }
930 }
931
932
933 public void testRandomChainsWithLargeStrings() throws Throwable {
934 int numIterations = TEST_NIGHTLY ? atLeast(20) : 3;
935 Random random = random();
936 for (int i = 0; i < numIterations; i++) {
937 try (MockRandomAnalyzer a = new MockRandomAnalyzer(random.nextLong())) {
938 if (VERBOSE) {
939 System.out.println("Creating random analyzer:" + a);
940 }
941 try {
942 checkRandomData(random, a, 50*RANDOM_MULTIPLIER, 80, false,
943 false );
944 } catch (Throwable e) {
945 System.err.println("Exception from random analyzer: " + a);
946 throw e;
947 }
948 }
949 }
950 }
951 }